1 Background

We compare the inference results from TMB, aghq and tmbstan. Import these inference results as follows:

tmb <- readRDS("depends/tmb.rds")
aghq <- readRDS("depends/aghq.rds")
adam <- readRDS("depends/adam.rds")
tmbstan <- readRDS("depends/tmbstan.rds")

For more information about the conditions under which these results were generated, see:

depends <- yaml::read_yaml("orderly.yml")$depends

for(i in seq_along(depends)) {
  report_name <- names(depends[[i]])
  print(paste0("Inference results obtained from ", report_name, " with the query ", depends[[i]][[report_name]]$id))
  report_id <- orderly::orderly_search(query = depends[[i]][[report_name]]$id, report_name)
  print(paste0("Obtained report had ID ", report_id, " and was run with the following parameters:"))
  print(orderly::orderly_info(report_id, report_name)$parameters)
}
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:tmb == TRUE)"
## [1] "Obtained report had ID 20230119-142551-23704e7b and was run with the following parameters:"
## $tmb
## [1] TRUE
## 
## $aghq
## [1] FALSE
## 
## $k
## [1] 2
## 
## $ndConstruction
## [1] "product"
## 
## $tmbstan
## [1] FALSE
## 
## $niter
## [1] 1000
## 
## $nthin
## [1] 1
## 
## $nsample
## [1] 1000
## 
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:aghq == TRUE && parameter:k == 1)"
## [1] "Obtained report had ID 20230119-170459-9b07786e and was run with the following parameters:"
## $aghq
## [1] TRUE
## 
## $k
## [1] 1
## 
## $ndConstruction
## [1] "product"
## 
## $tmb
## [1] FALSE
## 
## $tmbstan
## [1] FALSE
## 
## $niter
## [1] 1000
## 
## $nthin
## [1] 1
## 
## $nsample
## [1] 1000
## 
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:adam == TRUE)"
## [1] "Obtained report had ID 20230204-171524-7233e968 and was run with the following parameters:"
## $adam
## [1] TRUE
## 
## $tmb
## [1] FALSE
## 
## $aghq
## [1] FALSE
## 
## $k
## [1] 2
## 
## $ndConstruction
## [1] "product"
## 
## $tmbstan
## [1] FALSE
## 
## $niter
## [1] 1000
## 
## $nthin
## [1] 1
## 
## $nsample
## [1] 1000
## 
## $area_level
## [1] 4
## 
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:tmbstan == TRUE)"
## [1] "Obtained report had ID 20230114-142916-efabd65d and was run with the following parameters:"
## $tmbstan
## [1] TRUE
## 
## $niter
## [1] 20000
## 
## $nthin
## [1] 20
## 
## $tmb
## [1] FALSE
## 
## $aghq
## [1] FALSE
## 
## $k
## [1] 2
## 
## $nsample
## [1] 1000

All of the possible parameter names are as follows:

unique(names(tmb$fit$obj$env$par))
##  [1] "beta_rho"             "beta_alpha"           "beta_lambda"          "beta_anc_rho"         "beta_anc_alpha"       "logit_phi_rho_x"      "log_sigma_rho_x"      "logit_phi_rho_xs"    
##  [9] "log_sigma_rho_xs"     "logit_phi_rho_a"      "log_sigma_rho_a"      "logit_phi_rho_as"     "log_sigma_rho_as"     "log_sigma_rho_xa"     "u_rho_x"              "us_rho_x"            
## [17] "u_rho_xs"             "us_rho_xs"            "u_rho_a"              "u_rho_as"             "logit_phi_alpha_x"    "log_sigma_alpha_x"    "logit_phi_alpha_xs"   "log_sigma_alpha_xs"  
## [25] "logit_phi_alpha_a"    "log_sigma_alpha_a"    "logit_phi_alpha_as"   "log_sigma_alpha_as"   "log_sigma_alpha_xa"   "u_alpha_x"            "us_alpha_x"           "u_alpha_xs"          
## [33] "us_alpha_xs"          "u_alpha_a"            "u_alpha_as"           "u_alpha_xa"           "OmegaT_raw"           "log_betaT"            "log_sigma_lambda_x"   "ui_lambda_x"         
## [41] "log_sigma_ancrho_x"   "log_sigma_ancalpha_x" "ui_anc_rho_x"         "ui_anc_alpha_x"       "log_sigma_or_gamma"   "log_or_gamma"

2 Time taken

data.frame(
  "TMB" = tmb$time,
  "aghq" = aghq$time,
  "adam" = adam$time,
  "tmbstan" = tmbstan$time
)
adam <- adam$adam

3 Histogram

beta_rho <- histogram_plot("beta_rho", i = 1, return_df = TRUE)
beta_rho$plot

saveRDS(beta_rho$df, "beta_rho.rds")

histogram_plot("beta_anc_rho")

4 KS

4.1 Individual parameters

ks_helper <- function(par, ...) {
  to_ks_df(par) %>% ks_plot(par, ...)
}

ks_helper("beta")

ks_helper("logit")

ks_helper("log_sigma")

ks_helper("u_rho_x")

ks_helper("u_rho_x", method1 = "TMB", method2 = "adam")

ks_helper("u_rho_xs")

ks_helper("u_rho_xs", method1 = "TMB", method2 = "adam")

ks_helper("us_rho_x")

ks_helper("us_rho_x", method1 = "TMB", method2 = "adam")

ks_helper("us_rho_xs")

ks_helper("us_rho_xs", method1 = "TMB", method2 = "adam")

ks_helper("u_rho_a")

ks_helper("u_rho_a", method1 = "TMB", method2 = "adam")

ks_helper("u_rho_as")

ks_helper("u_rho_as", method1 = "TMB", method2 = "adam")

ks_helper("u_alpha_x")

ks_helper("u_alpha_x", method1 = "TMB", method2 = "adam")

ks_helper("u_alpha_xs")

ks_helper("u_alpha_xs", method1 = "TMB", method2 = "adam")

ks_helper("us_alpha_x")

ks_helper("us_alpha_x", method1 = "TMB", method2 = "adam")

ks_helper("us_alpha_xs")

ks_helper("us_alpha_xs", method1 = "TMB", method2 = "adam")

ks_helper("u_alpha_a")

ks_helper("u_alpha_a", method1 = "TMB", method2 = "adam")

ks_helper("u_alpha_as")

ks_helper("u_alpha_as", method1 = "TMB", method2 = "adam")

ks_helper("u_alpha_xa")

ks_helper("u_alpha_xa", method1 = "TMB", method2 = "adam")

ks_helper("ui_anc_rho_x")

ks_helper("ui_anc_rho_x", method1 = "TMB", method2 = "adam")

ks_helper("ui_anc_alpha_x")

ks_helper("ui_anc_alpha_x", method1 = "TMB", method2 = "adam")

ks_helper("log_or_gamma")

ks_helper("log_or_gamma", method1 = "TMB", method2 = "adam")

4.2 Summary table

ks_summary <- lapply(unique(names(tmb$fit$obj$env$par)), function(x) {
  to_ks_df(x) %>%
    group_by(method) %>%
    summarise(ks = mean(ks), par = x)
}) %>%
  bind_rows() %>%
  pivot_wider(names_from = "method", values_from = "ks") %>%
  rename(
    "Parameter" = "par",
    "KS(aghq, tmbstan)" = "aghq",
    "KS(TMB, tmbstan)" = "TMB",
  )

ks_summary %>%
  gt::gt() %>%
  gt::fmt_number(
    columns = starts_with("KS"),
    decimals = 3
  )
Parameter adam KS(aghq, tmbstan) KS(TMB, tmbstan)
beta_rho 0.09425000 0.090 0.085
beta_alpha 0.11500000 0.101 0.103
beta_lambda 0.04150000 0.061 0.074
beta_anc_rho 0.04300000 0.105 0.090
beta_anc_alpha 0.08850000 0.044 0.025
logit_phi_rho_x 0.53575000 0.536 0.118
log_sigma_rho_x 0.64616667 0.646 0.195
logit_phi_rho_xs 0.50950000 0.510 0.111
log_sigma_rho_xs 0.61300000 0.613 0.203
logit_phi_rho_a 0.55250000 0.552 0.080
log_sigma_rho_a 0.58500000 0.585 0.113
logit_phi_rho_as 0.56850000 0.569 0.104
log_sigma_rho_as 0.58900000 0.589 0.134
log_sigma_rho_xa 0.66950000 0.669 0.224
u_rho_x 0.09125000 0.092 0.095
us_rho_x 0.06360156 0.062 0.062
u_rho_xs 0.12268750 0.117 0.121
us_rho_xs 0.04482813 0.037 0.044
u_rho_a 0.09127500 0.087 0.083
u_rho_as 0.09370000 0.084 0.076
logit_phi_alpha_x 0.53125000 0.531 0.107
log_sigma_alpha_x 0.55733333 0.557 0.143
logit_phi_alpha_xs 0.55100000 0.551 0.143
log_sigma_alpha_xs 0.54050000 0.540 0.159
logit_phi_alpha_a 0.52500000 0.525 0.083
log_sigma_alpha_a 0.54125000 0.541 0.111
logit_phi_alpha_as 0.51650000 0.516 0.081
log_sigma_alpha_as 0.51400000 0.514 0.098
log_sigma_alpha_xa 0.62700000 0.627 0.146
u_alpha_x 0.09606250 0.096 0.089
us_alpha_x 0.08746875 0.084 0.083
u_alpha_xs 0.10857812 0.101 0.096
us_alpha_xs 0.10553125 0.097 0.095
u_alpha_a 0.07565217 0.085 0.084
u_alpha_as 0.10035000 0.096 0.102
u_alpha_xa 0.06389063 0.069 0.059
OmegaT_raw 0.51800000 0.518 0.022
log_betaT 0.68900000 0.689 0.244
log_sigma_lambda_x 0.73600000 0.736 0.269
ui_lambda_x 0.15881250 0.158 0.160
log_sigma_ancrho_x 0.50350000 0.504 0.037
log_sigma_ancalpha_x 0.51950000 0.519 0.083
ui_anc_rho_x 0.04818750 0.055 0.057
ui_anc_alpha_x 0.06628125 0.063 0.065
log_sigma_or_gamma 0.52450000 0.524 0.036
log_or_gamma 0.04489063 0.059 0.060
ggplot(ks_summary, aes(x = `KS(TMB, tmbstan)`, y = `KS(aghq, tmbstan)`)) +
  geom_point(alpha = 0.5) +
  xlim(0, 1) +
  ylim(0, 1) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
  labs(x = "KS(TMB, tmbstan)", y = "KS(aghq, tmbstan)") +
  theme_minimal() 

ks_summary %>%
  mutate(`KS difference` = `KS(TMB, tmbstan)` - `KS(aghq, tmbstan)`) %>%
  ggplot(aes(x = `KS difference`)) +
    geom_boxplot(width = 0.5) +
    coord_flip() +
    labs(x = "KS(TMB, tmbstan) - KS(aghq, tmbstan)") +
    theme_minimal()

4.3 Investigation into large KS values

The following parameters have KS values greater than 0.5 in both dimensions.

(big_ks <- ks_summary %>%
  filter(`KS(aghq, tmbstan)` + `KS(TMB, tmbstan)` > 0.5) %>%
  pull(Parameter))
##  [1] "logit_phi_rho_x"      "log_sigma_rho_x"      "logit_phi_rho_xs"     "log_sigma_rho_xs"     "logit_phi_rho_a"      "log_sigma_rho_a"      "logit_phi_rho_as"     "log_sigma_rho_as"    
##  [9] "log_sigma_rho_xa"     "logit_phi_alpha_x"    "log_sigma_alpha_x"    "logit_phi_alpha_xs"   "log_sigma_alpha_xs"   "logit_phi_alpha_a"    "log_sigma_alpha_a"    "logit_phi_alpha_as"  
## [17] "log_sigma_alpha_as"   "log_sigma_alpha_xa"   "OmegaT_raw"           "log_betaT"            "log_sigma_lambda_x"   "log_sigma_ancrho_x"   "log_sigma_ancalpha_x" "log_sigma_or_gamma"

Let’s look into this further by plotting the histograms:

lapply(big_ks, histogram_plot)
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL
## 
## [[10]]
## NULL
## 
## [[11]]
## NULL
## 
## [[12]]
## NULL
## 
## [[13]]
## NULL
## 
## [[14]]
## NULL
## 
## [[15]]
## NULL
## 
## [[16]]
## NULL
## 
## [[17]]
## NULL
## 
## [[18]]
## NULL
## 
## [[19]]
## NULL
## 
## [[20]]
## NULL
## 
## [[21]]
## NULL
## 
## [[22]]
## NULL
## 
## [[23]]
## NULL
## 
## [[24]]
## NULL

5 MMD

#' To write!

6 PSIS

Suppose we have two sets of samples from the posterior. For each sample we are going to want to evaluate the log-likelihood, so that we can calculate the log-likelihood ratios. We can extract the TMB objective function for the log-likelihood as follows:

tmb$fit$obj$fn()
## [1] NaN